library(data.table)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::between() masks data.table::between()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(heatmaply)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Loading required package: viridis
## Loading required package: viridisLite
##
## ======================
## Welcome to heatmaply version 1.3.0
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags:
## https://stackoverflow.com/questions/tagged/heatmaply
## ======================
library(RColorBrewer)
library(vcd)
## Loading required package: grid
load("tf0.rdata")
#load("tf3.rdata")
#load("tf4.rdata")
#load("CX.rdata")
load("tf4.rdata")
par(mfrow=c(1,2),cex=0.7)
table(A0$age, useNA='ifany') %>% barplot(main="Age Groups",las=2) #人數分布在a34、a39、a44
table(A0$area, useNA='ifany') %>% barplot(main="Areas",las=2) #地區分布在南港區及汐止區
cats = Z0 %>%
group_by(cat) %>%
summarise(
noProd = n_distinct(prod), #取商品唯一ID
totalQty = sum(qty), #總銷售數量
totalRev = sum(price), #總收益
totalGross = sum(price) - sum(cost),#總毛利
grossMargin = totalGross/totalRev, #邊際毛利
avgPrice = totalRev/totalQty#平均價格
)
cats$ID <- as.character(cats$cat)
(g1 <- cats %>%
top_n(10, totalRev) %>%
ggplot(aes(x = ID, y = totalRev)) +
geom_col()) #560102及560402的銷售金額最高
(g2 <- cats %>%
top_n(10, totalGross) %>%
ggplot(aes(x = ID, y = totalGross)) +
geom_col()) #320402、560201、560402的總毛利最高
(g3 <- cats %>%
top_n(10, avgPrice) %>%
ggplot(aes(x = ID, y = avgPrice)) +
geom_col()) #平均價格前10名均落在3000~4000
(g4 <- cats %>%
top_n(10, grossMargin) %>%
ggplot(aes(x = ID, y = grossMargin)) +
geom_col()) #邊際毛利均落在0.4以上
top10_Rev = cats %>% top_n(10,totalRev)
top10_Gross = cats %>% top_n(10,totalGross)
top = merge(top10_Rev,top10_Gross,all = F)
col6 = c('seagreen','gold','orange',rep('red',3))
options(scipen = 999)
g = top %>% ggplot(aes(x=totalRev,y=totalGross,size=totalQty,col=avgPrice)) + geom_point(alpha=0.7)+ geom_text(aes(label=cat,size=0.4),col="black") + scale_size(range=c(5,20)) + scale_color_gradientn(colors=col6) + theme_bw()
ggplotly(g)
# (A0_rfm <- A0 %>%
# select(cust, r, s, f, m, rev) %>%
# summarize(cust, r, f, m, rev, avg_f = (s-r) / (f-1)))
# sum(is.na(A0_rfm$avg_f))
# table(A0_rfm$avg_f) %>%barplot
X0$wday = format(X0$date, "%u")
mtx1 = table(X0$age,X0$wday) %>% prop.table(1)
mtx1 = as.data.frame.matrix(mtx1)
heatmaply(mtx1,Rowv=F,Colv=F)
#年輕人特別喜歡在周末來消費,而老人消費頻率則較為平均
mtx2 = table(X0$area,X0$wday) %>% prop.table(1)
mtx2 = as.data.frame.matrix(mtx2)
heatmaply(mtx2,Rowv=F,Colv=F)
#各地區的人普遍都喜歡在周末消費,其中以信義區和松山區更為明顯
top_10 = cats %>% top_n(10, totalGross) %>% pull(cat)
Z_top10 = Z0 %>% filter(cat %in% top$cat)
Z_top10$wday = format(Z_top10$date, "%u")
Z_top10 = Z_top10 %>% mutate(Gross = price-cost)
a = Z_top10 %>% count(cat,wday,wt=Gross)
xtabs(n~cat+wday,data=a) %>% as.data.frame.matrix %>% heatmaply(Rowv=F,Colv=F)
top10_Rev = cats %>% top_n(10,totalRev)
top10_Gross = cats %>% top_n(10,totalGross)
top = merge(top10_Rev,top10_Gross,all = F)
MOSA = function(formula, data) mosaic(formula, data, shade=T,
margins=c(0,1,0,0), labeling_args = list(rot_labels=c(90,0,0,0)),
gp_labels=gpar(fontsize=9), legend_args=list(fontsize=9),
gp_text=gpar(fontsize=7),labeling=labeling_residuals)
MOSA(~cat+age, Z0[Z0$cat %in% top$cat,])
A0$cust <- as.numeric(A0$cust)
A0_cluster <- A0 %>%
select(cust, r, s, f, m)
A0_cluster_scale = scale(A0_cluster[,c(2:5)]) %>% data.frame
sapply(A0_cluster_scale, mean)
## r s
## 0.000000000000000020673174 0.000000000000000076871627
## f m
## -0.000000000000000001795684 0.000000000000000031208240
sapply(A0_cluster_scale,sd)
## r s f m
## 1 1 1 1
d = dist(A0_cluster_scale, method="euclidean")#歐式距離
hc = hclust(d, method='ward.D') #華德法
plot(hc)
kg = cutree(hc, k=7)
table(kg)
## kg
## 1 2 3 4 5 6 7
## 5884 7549 7439 2262 5310 2863 934
names(A0_cluster_scale) =c(
"最近消費天數","第一次消費天數","頻率","平均交易金額")
kg1 = kg %>% as.factor() %>% as.data.frame()
colnames(kg1) <- "group"
sapply(split(A0_cluster,kg), colMeans) %>% round(2) # 原始尺度
## 1 2 3 4 5 6
## cust 1442167.64 1504745.41 1539985.66 1528642.46 1448635.80 1349386.49
## r 24.63 22.08 88.19 47.67 19.46 8.43
## s 98.30 32.37 93.57 53.50 101.98 109.90
## f 3.77 1.66 1.38 1.32 4.01 9.60
## m 500.61 692.20 736.40 3218.22 1647.82 606.97
## 7
## cust 1280019.29
## r 4.81
## s 116.03
## f 24.16
## m 646.34
sapply(split(A0_cluster_scale,kg), colMeans) %>% round(2) # 標準尺度
## 1 2 3 4 5 6 7
## 最近消費天數 -0.38 -0.46 1.51 0.30 -0.53 -0.86 -0.97
## 第一次消費天數 0.51 -1.42 0.38 -0.80 0.62 0.85 1.03
## 頻率 0.02 -0.42 -0.48 -0.49 0.06 1.22 4.24
## 平均交易金額 -0.51 -0.31 -0.27 2.32 0.68 -0.40 -0.36
plot.new()
c = brewer.pal(12,"Set3")[2:5] # 設定顏色
par(cex=0.8,family="黑體-繁 中黑")
split(A0_cluster_scale,kg) %>% sapply(colMeans) %>% barplot(beside=T,col =c,legend = TRUE, xlim=c(0,45), names.arg = c("生活", "瞌睡", "新顧客", "沉睡", "潛力股", "忠實", "員工"), xlab="分群")
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : font family not found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): font family not
## found in Windows font database
group = A0_cluster %>% cbind(kg1) %>%
mutate (customer = case_when(group == 1 ~"生活型顧客",
group == 2 ~"沉睡型顧客",
group == 3 ~"沉睡型顧客",
group == 4 ~"節慶型顧客",
group == 5 ~"採購型顧客",
group == 6 ~"忠實顧客",
group == 7 ~"員工"))
cust_group <- group %>%
select(cust, group, customer) %>%
merge(A0, by = "cust")
MOSA(~group+age, data=cust_group)
MOSA(~group+area, data=cust_group)
```